# Replication code
library(nlme)
library(dplyr)
library(lubridate)
library(ggplot2)
library(tidyr)
library(gridExtra)
#library(egg)
library(ggpubr)

setwd("~/Dropbox/policing & COVID/analysis")

# City census data on population
city<-data.frame(read.csv("city_census.csv", header=TRUE, stringsAsFactors =FALSE))

# individual-level arrest data, cleaned 
arrests<-data.frame(read.csv("id_polcov_081821.csv", header=TRUE, stringsAsFactors = FALSE))

date <- read.csv("date.csv")
idind<- match(arrests$id, date$id)
arrests$newdate2 <- newdate2 <- date$newdate[idind]

# generating time variables
arrests$newdate<-mdy(arrests$newdate2)
arrests$year<-year(arrests$newdate)
arrests$yearwk<-week(arrests$newdate) # week in the year, range 1-53
arrests$week<-ifelse(arrests$year>2019,arrests$yearwk+52, arrests$yearwk) # continuous weeks variable, range 1-105

#roll up the arrests to CT-weeks
weekly <- arrests %>% group_by(juris,week) %>% summarise(minor=sum(minor),
                                                         yadult=sum(yadult),
                                                         adult=sum(adult),
                                                         elder=sum(elder),
                                                         black=sum(black),
                                                         white=sum(white),
                                                         hispanic=sum(hispanic),
                                                         other=sum(other),
                                                         asian=sum(asian),
                                                         male=sum(male),
                                                         female=sum(female),
                                                         bminor=sum(bminor),
                                                         wminor=sum(wminor),
                                                         hminor=sum(hminor),
                                                         ominor=sum(ominor),
                                                         byadult=sum(byadult),
                                                         wyadult=sum(wyadult),
                                                         hyadult=sum(hyadult),
                                                         oyadult=sum(oyadult),
                                                         bmyadult=sum(bmyadult),
                                                         bfyadult=sum(bfyadult),
                                                         wmyadult=sum(wmyadult),
                                                         wfyadult=sum(wfyadult),
                                                         hmyadult=sum(hmyadult),
                                                         hfyadult=sum(hfyadult),
                                                         omyadult=sum(omyadult),
                                                         ofyadult=sum(ofyadult),
                                                         belder=sum(belder),
                                                         bmelder=sum(bmelder),
                                                         bfelder=sum(bfelder),
                                                         welder=sum(welder),
                                                         wmelder=sum(wmelder),
                                                         wfelder=sum(wfelder),
                                                         helder=sum(helder),
                                                         hmelder=sum(hmelder),
                                                         hfelder=sum(hfelder),
                                                         oelder=sum(oelder),
                                                         omelder=sum(omelder),
                                                         ofelder=sum(ofelder),
                                                         bboy=sum(bboy),
                                                         bgirl=sum(bgirl),
                                                         wboy=sum(wboy),
                                                         wgirl=sum(wgirl),
                                                         hboy=sum(hboy),
                                                         hgirl=sum(hgirl),
                                                         oboy=sum(oboy),
                                                         ogirl=sum(ogirl),
                                                         bviolent=sum(bviolent),
                                                         wviolent=sum(wviolent),
                                                         hviolent=sum(hviolent),
                                                         oviolent=sum(oviolent),
                                                         bdrug=sum(bdrug),
                                                         wdrug=sum(wdrug),
                                                         hdrug=sum(hdrug),
                                                         odrug=sum(odrug),
                                                         bsocdis=sum(bsocdis),
                                                         wsocdis=sum(wsocdis),
                                                         hsocdis=sum(hsocdis),
                                                         osocdis=sum(osocdis),
                                                         bdriving=sum(bdriving),
                                                         wdriving=sum(wdriving),
                                                         hdriving=sum(hdriving),
                                                         odriving=sum(odriving),
                                                         total=n())

#setting up the city shell so that we have spaces for city-weeks with 0 counts 
a<-as.data.frame(table(city$juris))
b<-a[rep(a$Var1,105),]
b<-b[with(b, order(Var1)),]
b$week<-rep(seq(1:105),nrow(a))
names(b)[1]<-"juris"

x1<-merge(x=weekly, y=b, by=c("juris", "week"), all.y=TRUE)
x1[is.na(x1)]<-0
x1 <- merge(x=x1, y=city, by=c("juris"), all=TRUE)

###########################
#   temporal indicators   #
###########################

#generating a week variable that is negative before stayorder, positive after
x1 <- x1 %>% mutate(weeksp= case_when(
  juris=="Boston" ~ week-64,
  juris=="Charleston" ~ week-66,
  juris=="Pittsburgh" ~ week-64,
  juris=="New York City" ~ week-64
))

# generating a variable for school openings & closures 
x1 <- x1 %>% mutate(spring20_closure= case_when(
  juris=="Boston" ~ week-63,
  juris=="Charleston" ~ week-63,
  juris=="Pittsburgh" ~ week-63,
  juris=="New York City" ~ week-63
))

x1 <- x1 %>% mutate(fall20_opening= case_when(
  juris=="Boston" ~ week-90,
  juris=="Charleston" ~ week-88,
  juris=="Pittsburgh" ~ week-88,
  juris=="New York City" ~ week-90
))

# making a time-varying indicator of remote status
x1 <- x1 %>% mutate(remote= case_when(
  juris=="Boston" ~ week-63,
  juris=="Charleston" ~ week-88,
  juris=="Pittsburgh" ~ week-63,
  juris=="New York City" ~ week-90
))

# making a time period variable
x1 <- x1 %>% mutate(timechunk= case_when(
  juris=="Boston" & week<63 ~ 0,      # PRE PERIOD
  juris=="Charleston" & week<63 ~ 0,
  juris=="Pittsburgh" & week<63 ~ 0,
  juris=="New York City" & week<63 ~ 0,
  juris=="Boston" & week>=63 ~ 1,   # CLOSURE
  juris=="Charleston" & week>=63 & week<88 ~ 1,
  juris=="Pittsburgh" & week>=63 ~ 1,
  juris=="New York City" & week>=63 & week<90 ~ 1,
  juris=="Charleston" & week>88 ~ 2, # return for Charleston & NYC
  juris=="New York City" & week>90 ~ 2
))

x1<-na.omit(x1)
#x1<-x1[x1$juris=="New York City",]

#save(x1, file = "polrace.RData")
### In-Text Statistics
x1 %>% group_by(timechunk) %>% summarise(Minors=mean((minor/minorpop))*100000,
                                         Black=mean((bminor/bminorpop))*100000,
                                         White=mean((wminor/wminorpop))*100000,
                                         Latinx=mean((hminor/hminorpop))*100000)

### Main Text Figure 1
plot <- na.omit(x1) %>% group_by(week) %>% summarise(Minors=mean((minor/minorpop))*100000,
                                                     Black=mean((bminor/bminorpop))*100000,
                                                     White=mean((wminor/wminorpop))*100000,
                                                     Latinx=mean((hminor/hminorpop))*100000,
                                                     Other=mean((ominor/ominorpop))*100000,
                                                     BBoy=mean((bboy/bminormpop))*100000,
                                                     BGirl=mean((bgirl/bminorfpop))*100000,
                                                     YoungAdult=mean((yadult/yadultpop))*100000,
                                                     BYoungAdult=mean((byadult/byadultpop))*100000)
plot$Year<-c(rep("2019",52),rep("2020",53))

black<-plot[,c(1,2,6)]
plot$yrwk<-c(seq(1:52),seq(1:53))

plot<- plot %>% mutate(month= case_when(
  yrwk>=1 & yrwk<=4 ~ "Jan",
  yrwk>=5 & yrwk<=8 ~ "Feb",
  yrwk>=9 & yrwk<=13 ~ "Mar",
  yrwk>=14 & yrwk<=17 ~ "Apr",
  yrwk>=18 & yrwk<=21 ~ "May",
  yrwk>=22 & yrwk<=26 ~ "June",
  yrwk>=27 & yrwk<=30 ~ "July",
  yrwk>=31 & yrwk<=34 ~ "Aug",
  yrwk>=35 & yrwk<=39 ~ "Sept",
  yrwk>=40 & yrwk<=43 ~ "Oct",
  yrwk>=44 & yrwk<=47 ~ "Nov",
  yrwk>=48  ~ "Dec"
))

#colorpal <- ggsci::pal_jama()(2)
#names(colorpal) <- c("2019", "2020")
f1<-ggplot(plot, aes(x=yrwk, y=Minors, color=Year, group=factor(Year)))   + 
  geom_point()+geom_smooth(method="loess",span=0.25)+
  xlab(" ")+ geom_vline(xintercept=11, linetype="dashed") +
  ylab("Youth Arrests per 100,000") + 
  scale_color_manual(values=c("grey80", "#374E55FF"))+
  theme_classic()+ 
  theme( plot.title = element_text(size=14, hjust=0.5),legend.position = "bottom")+
  guides(color=guide_legend(override.aes=list(fill=NA)))+
  scale_x_continuous(breaks=c(1,9,18,27,36,45), labels=c("Jan","Mar","May","July","Sept","Nov"))
f1
ggsave("minorarrestsbin25.png", dpi=300)

### Main Text Figure 3
#colorpal <- ggsci::pal_jama()(3)
#names(colorpal) <- c("Black", "Latinx","White")
plot <- na.omit(x1) %>% group_by(weeksp) %>% summarise(Black=mean((bminor/bminorpop))*100000,
                                                       White=mean((wminor/wminorpop))*100000,
                                                       Latinx=mean((hminor/hminorpop))*100000,
)
plot_long<-gather(plot,Type,Rate,Black:Latinx)
f2<-ggplot(plot_long, aes(x=weeksp, y=Rate, color=Type, group=factor(Type)))   + 
  geom_point()+geom_smooth(method="loess",span=0.5)+
  xlab("Weeks Pre/Post Spring 2020 School Closures")+ geom_vline(xintercept=0, linetype="dashed") +
  ylab("Youth Arrests per 100,000") + 
  scale_color_manual(values=c("#DF8F44FF","#374E55FF","#00A1D5FF"))+
  theme_classic()+ 
  theme( plot.title = element_text(size=14, hjust=0.5),legend.title=element_blank(),legend.position = "bottom")+
  guides(color=guide_legend(override.aes=list(fill=NA)))
f2
ggsave("racexminorarrestsbin25.png", dpi=300)

### Supplemental Materials Figure 1
plot <- na.omit(x1) %>% group_by(weeksp) %>% summarise(Minor=mean((minor/minorpop))*100000,
                                                       YoungAdult=mean((yadult/yadultpop))*100000,
                                                       Adult=mean((adult/adultpop))*100000,
                                                       Elderly=mean((elder/elderpop))*100000
)
plot_long<-gather(plot,Type,Rate,Minor:Elderly)
plot_long$Type <- factor(plot_long$Type,levels = c("Minor", "YoungAdult", "Adult", "Elderly"))

s1<-ggplot(plot_long[plot_long$Rate<=150,], aes(x=weeksp, y=Rate, color=Type, group=factor(Type)))   + 
  geom_point()+geom_smooth(method="loess",span=0.25)+
  xlab("Weeks Pre/Post Spring 2020 School Closures")+ geom_vline(xintercept=0, linetype="dashed") +
  ylab("Arrests per 100,000") + 
  scale_color_manual(name = "Type", labels = c("<18", "18-24", "25-64", "65+"),values = c("#DF8F44FF","#374E55FF","#00A1D5FF","#B24745FF"))+
  theme_classic()+ 
  theme( plot.title = element_text(size=14, hjust=0.5),legend.title=element_blank(),legend.position = "bottom")+
  guides(color=guide_legend(override.aes=list(fill=NA)))
s1
ggsave("agexarrestsbin25.png", dpi=300)

### MODELING
# creating a month indicator
x1<- x1 %>% mutate(month= case_when(
  week>=1 & week<=4 ~ 1,
  week>=5 & week<=8 ~ 2,
  week>=9 & week<=13 ~ 3,
  week>=14 & week<=17 ~ 4,
  week>=18 & week<=21 ~ 5,
  week>=22 & week<=26 ~ 6,
  week>=27 & week<=30 ~ 7,
  week>=31 & week<=34 ~ 8,
  week>=35 & week<=39 ~ 9,
  week>=40 & week<=43 ~ 10,
  week>=44 & week<=47 ~ 11,
  week>=48 & week<=52 ~ 12,
  week>=53 & week<=57 ~ 1,
  week>=58 & week<=61 ~ 2,
  week>=62 & week<=65 ~ 3,
  week>=66 & week<=70 ~ 4,
  week>=71 & week<=74 ~ 5,
  week>=75 & week<=78 ~ 6,
  week>=79 & week<=83 ~ 7,
  week>=84 & week<=87 ~ 8,
  week>=88 & week<=91 ~ 9,
  week>=92 & week<=95 ~ 10,
  week>=96 & week<=99 ~ 11,
  week>=100  ~ 12
))

# first just comparing 2020 arrests to pre-period on average
library(MASS)
library(sandwich)

ses<- function(model) { 
  cov <- vcovHC(model, type = "HC0")
  std.err <- sqrt(diag(cov))
  q.val <- qnorm(0.975)
  r.est <- as.data.frame(cbind(
    Estimate = coef(model)
    , "Robust SE" = std.err
    , z = (coef(model)/std.err)
    , "Pr(>|z|) "= 2 * pnorm(abs(coef(model)/std.err), lower.tail = FALSE)
    , LL = coef(model) - q.val  * std.err
    , UL = coef(model) + q.val  * std.err
  ))
  return(r.est)
}

# MINORS
m1 <- glm.nb(minor ~  as.factor(timechunk) + as.factor(juris) + as.factor(month)
             + offset(log(minorpop)) , data=x1)
summary(m1)

# Minors By City
m1a <- glm.nb(minor ~  as.factor(timechunk) + as.factor(month)
              + offset(log(minorpop)) , data=x1[x1$juris=="Boston",])
summary(m1a)
m1b <- glm.nb(minor ~  as.factor(timechunk) + as.factor(month)
              + offset(log(minorpop)) , data=x1[x1$juris=="Charleston",])
summary(m1b)
m1c <- glm.nb(minor ~  as.factor(timechunk) + as.factor(month)
              + offset(log(minorpop)) , data=x1[x1$juris=="Pittsburgh",])
summary(m1c)
m1d <- glm.nb(minor ~  as.factor(timechunk) + as.factor(month)
              + offset(log(minorpop)) , data=x1[x1$juris=="New York City",])
summary(m1d)

bos_m<-ses(m1a)
cha_m<-ses(m1b)
pitt_m<-ses(m1c)
nyc_m<-ses(m1d)

### Supplemental Materials Figure 3
all<-rbind(bos_m[2,],cha_m[c(2:3),],pitt_m[2,],nyc_m[c(2:3),])
all$City<-c(rep("Boston",1),rep("Charleston",2),rep("Pittsburgh",1),rep("New York City",2))
all$cRR<-(exp(all$Estimate)-1)*100
all$cLL<-(exp(all$LL)-1)*100
all$cUL<-(exp(all$UL)-1)*100

all$Time<-c("Remote","Remote","Returned","Remote","Remote","Returned")

ggplot(all, aes(x=Time, y=cRR, col=City))+
  geom_point(position=position_dodge(width=0.2))+
  geom_errorbar(aes(ymin=cLL, ymax=cUL), width=0.2,position=position_dodge(width=0.2))+
  theme_minimal()+
  scale_color_manual(values = c("#DF8F44FF","#374E55FF","#00A1D5FF","#B24745FF"))+
  ylab("% Change in Youth Arrest Rates After School Closure")+xlab("Time Period")+
  geom_hline(yintercept=0, linetype="dashed", color="grey50")+
  theme(legend.position = "bottom")
ggsave("minors_changepost.png", dpi=300)

## Age group comparisons
m2 <- glm.nb(minor ~  as.factor(timechunk) + as.factor(juris) + as.factor(month)
             + offset(log(minorpop)) , data=x1)
summary(m2)
m2 <- glm.nb(yadult ~  as.factor(timechunk) + as.factor(juris) + as.factor(month)
             + offset(log(yadultpop)) , data=x1)
summary(m2)

m3 <- glm.nb(adult ~  as.factor(timechunk) + as.factor(juris) + as.factor(month)
             + offset(log(adultpop)) , data=x1)
summary(m3)

m4 <- glm.nb(elder ~  as.factor(timechunk) + as.factor(juris) + as.factor(month)
             + offset(log(elderpop)) , data=x1)
summary(m4)

minor<-ses(m1)
youngadult<-ses(m2)
adult<-ses(m3)
elderly<-ses(m4)

all<-rbind(minor[c(2:3),],youngadult[c(2:3),],adult[c(2:3),],elderly[c(2:3),])
all$Age<-c(rep("<18",2),rep("18-24",2),rep("25-64",2),rep("65+",2))
all$cRR<-(exp(all$Estimate)-1)*100
all$cLL<-(exp(all$LL)-1)*100
all$cUL<-(exp(all$UL)-1)*100

all$Time<-rep(c("Remote","Returned"),4)

# In-text model stats
all$IRR<-exp(all$Estimate)
all$IRR_LL<-exp(all$LL)
all$IRR_UL<-exp(all$UL)

### FIGURE 2
all$Age<- factor(all$Age,levels = c("<18", "18-24", "25-64", "65+"))

ggplot(all, aes(x=Time, y=cRR, col=Age))+
  geom_point(position=position_dodge(width=0.2))+
  geom_errorbar(aes(ymin=cLL, ymax=cUL), width=0.2,position=position_dodge(width=0.2))+
  scale_color_manual(values = c("#DF8F44FF","#374E55FF","#00A1D5FF","#B24745FF"))+
    theme_minimal()+
  ylab("% Change in Arrest Rates After School Closure")+xlab("Time Period")+
  geom_hline(yintercept=0, linetype="dashed", color="grey50")+
  theme(legend.position = "bottom")
ggsave("agegroups_changepost.png", dpi=300)

## race/ethnicity youth comparisons
m1 <- glm.nb(bminor ~  as.factor(timechunk) + as.factor(juris) + as.factor(month)
             + offset(log(bminorpop)) , data=x1)
summary(m1)

m2 <- glm.nb(hminor ~  as.factor(timechunk) + as.factor(juris) + as.factor(month)
             + offset(log(hminorpop)) , data=x1)
summary(m2)

m3 <- glm.nb(wminor ~  as.factor(timechunk) + as.factor(juris) + as.factor(month)
             + offset(log(wminorpop)) , data=x1)
summary(m3)

black<-ses(m1)
latinx<-ses(m2)
white<-ses(m3)

all<-rbind(black[c(2:3),],latinx[c(2:3),],white[c(2:3),])
all$Race<-c(rep("Black",2),rep("Latinx",2),rep("White",2))
all$cRR<-(exp(all$Estimate)-1)*100
all$cLL<-(exp(all$LL)-1)*100
all$cUL<-(exp(all$UL)-1)*100

all$Time<-rep(c("Remote","Returned"),3)

# In-text model stats
all$IRR<-exp(all$Estimate)
all$IRR_LL<-exp(all$LL)
all$IRR_UL<-exp(all$UL)

### Supplemental Materials Figure 4
all$Race<- factor(all$Race,levels = c("Black", "Latinx", "White"))

ggplot(all, aes(x=Time, y=cRR, col=Race))+
  geom_point(position=position_dodge(width=0.2))+
  geom_errorbar(aes(ymin=cLL, ymax=cUL), width=0.2,position=position_dodge(width=0.2))+
  scale_color_manual(values = c("#DF8F44FF","#374E55FF","#00A1D5FF"))+
  theme_minimal()+
  ylab("% Change in Arrest Rates After School Closure")+xlab("Time Period")+
  geom_hline(yintercept=0, linetype="dashed", color="grey50")+
  theme(legend.position = "bottom")
ggsave("racegroups_changepost.png", dpi=300)

## SECOND TEMPORAL COMPARISON
# COMPARING ARRESTS IN THESE 2 TIME PERIODS TO A REFERENT OF SUMMER 2019
post<-x1[x1$weeksp>=0,]
pre19<-x1[x1$week>=22 & x1$week<=34,]
x2<-rbind(post,pre19)

m1 <- glm.nb(minor ~  as.factor(timechunk) + as.factor(juris) + as.factor(month)
             + offset(log(minorpop)) , data=x2)
summary(m1)
m1a <- glm.nb(minor ~  as.factor(timechunk) + as.factor(month)
              + offset(log(minorpop)) , data=x2[x2$juris=="Boston",])
summary(m1a)
m1b <- glm.nb(minor ~  as.factor(timechunk) + as.factor(month)
              + offset(log(minorpop)) , data=x2[x2$juris=="Charleston",])
summary(m1b)
m1c <- glm.nb(minor ~  as.factor(timechunk) + as.factor(month)
              + offset(log(minorpop)) , data=x2[x2$juris=="Pittsburgh",])
summary(m1c)
m1d <- glm.nb(minor ~  as.factor(timechunk) + as.factor(month)
              + offset(log(minorpop)) , data=x2[x2$juris=="New York City",])
summary(m1d)

bos_m<-ses(m1a)
cha_m<-ses(m1b)
pitt_m<-ses(m1c)
nyc_m<-ses(m1d)

all<-rbind(bos_m[2,],cha_m[c(2:3),],pitt_m[2,],nyc_m[c(2:3),])
all$City<-c(rep("Boston",1),rep("Charleston",2),rep("Pittsburgh",1),rep("New York City",2))
all$cRR<-(exp(all$Estimate)-1)*100
all$cLL<-(exp(all$LL)-1)*100
all$cUL<-(exp(all$UL)-1)*100

all$Time<-c("Remote","Remote","Returned","Remote","Remote","Returned")

## Supplemental Materials Figure 2
ggplot(all, aes(x=Time, y=cRR, col=City))+
  geom_point(position=position_dodge(width=0.2))+
  geom_errorbar(aes(ymin=cLL, ymax=cUL), width=0.2,position=position_dodge(width=0.2))+
  scale_color_manual(values = c("#DF8F44FF","#374E55FF","#00A1D5FF","#B24745FF"))+
  theme_minimal()+
  ylab("% Change in Arrest Rates After School Closure")+xlab("Time Period")+
  geom_hline(yintercept=0, linetype="dashed", color="grey50")+
  theme(legend.position = "bottom")
ggsave("minors_changepost_19ref.png", dpi=300)
